#---- Replication script for analysis found in the main paper ----

#Turnbull-Dugarte, López-Ortega & Hunklinger (2024)
#"Do citizens stereotype Muslims as the Illiberal 'Bogeyman'? Evidence from a Double-List Experiment"

remove(list=ls())

#install.packages("pacman")
library(pacman)

p_load(here, tidyverse, haven, modelsummary, jtools,
       list, ggpubr, survey, xtable, interactions, viridisLite,
       ggdist, patchwork, estimatr, margins, grf, starbility, miceadds)



colors <- viridis(option = "plasma", 
                  begin = 0.2, 
                  end = 0.8, 
                  direction = -1, 
                  n = 4)


##Loading data##
list_dataNL <- read_dta("data/listNL.dta")
list_dataNL <-as.data.frame(list_dataNL)
list_dataNL<- list_dataNL  %>% 
  mutate(treatfac=as.factor(treated),
         IDvar=as.factor(IDvar),
         homonat=as.factor(homonat),
         round.fac=as.factor(round))
listNL_A <- list_dataNL %>% 
  filter(round==1)
listNL_B <- list_dataNL %>% 
  filter(round==2)
list_dataNL <- subset(list_dataNL, !(is.na(weight)))

list_dataDE <- read_dta("data/listDE.dta")
list_dataDE <-as.data.frame(list_dataDE)
list_dataDE<- list_dataDE  %>% 
  mutate(treatfac=as.factor(treated),
         IDvar=as.factor(IDvar),
         homonat=as.factor(homonat),
         round.fac=as.factor(round))
listDE_A <- list_dataDE %>% 
  filter(round==1)
listDE_B <- list_dataDE %>% 
  filter(round==2)
list_dataDE <- subset(list_dataDE, !(is.na(weight)))

list_dataUK <- read_dta("data/listUK.dta")
list_dataUK <-as.data.frame(list_dataUK)
list_dataUK<- list_dataUK  %>% 
  mutate(treatfac=as.factor(treated),
         IDvar=as.factor(IDvar),
         homonat=as.factor(homonat),
         round.fac=as.factor(round))
listUK_A <- list_dataUK %>% 
  filter(round==1)
listUK_B <- list_dataUK %>% 
  filter(round==2)

list_dataUS <- read_dta("data/listUS.dta")
list_dataUS <-as.data.frame(list_dataUS)
list_dataUS<- list_dataUS  %>% 
  mutate(treatfac=as.factor(treated),
         IDvar=as.factor(IDvar),
         homonat=as.factor(homonat),
         round.fac=as.factor(round))
listUS_A <- list_dataUS %>% 
  filter(round==1)
listUS_B <- list_dataUS %>% 
  filter(round==2)


##FIGURE 1###

NL1<- lm(value ~ treatfac + respondent_age + respondent_age2 + respondent_gender + respondent_degree +
           respondent_ideology + affective_muslims + affective_lgb + respondent_sexuality, 
         weights=weight, data = list_dataNL)
cluster_se <- vcovCL(NL1, cluster = list_dataNL$IDvar)
NLtidy <- tidy(NL1, cluster = cluster_se)
NLtidy$cntry <- "Netherlands"

DE1<- lm(value ~ treatfac + respondent_age + respondent_age2 + respondent_gender + respondent_degree +
           respondent_ideology + affective_muslims + affective_lgb + respondent_sexuality, 
         weights=weight, data = list_dataDE)
cluster_se <- vcovCL(DE1, cluster = list_dataDE$IDvar)
DEtidy <- tidy(DE1, cluster = cluster_se)
DEtidy$cntry <- "Germany"

UK1<- lm(value ~ treatfac + respondent_age + respondent_age2 + respondent_gender + respondent_degree +
           respondent_ideology + affective_muslims + affective_lgb + respondent_sexuality, data = list_dataUK)
cluster_se <- vcovCL(UK1, cluster = list_dataUK$IDvar)
UKtidy <- tidy(UK1, cluster = cluster_se)
UKtidy$cntry <- "UK"
US1<- lm(value ~ treatfac + respondent_age + respondent_age2 + respondent_gender + respondent_degree +
           respondent_ideology + affective_muslims + affective_lgb + respondent_sexuality, data = list_dataUS)
cluster_se <- vcovCL(US1, cluster = list_dataUS$IDvar)
UStidy <- tidy(US1, cluster = cluster_se)
UStidy$cntry <- "USA"

models<- rbind(NLtidy, DEtidy, UKtidy, UStidy)
models<-subset(models, term %in% c("treatfac1")) 

models$lower90 <- models$estimate - (1.645 * models$std.error)
models$higher90 <- models$estimate + (1.645 * models$std.error)
models$lower95 <- models$estimate - (1.96 * models$std.error)
models$higher95 <- models$estimate + (1.96 * models$std.error)
models$lower99 <- models$estimate - (2.58 * models$std.error)
models$higher99 <- models$estimate + (2.58 * models$std.error)
models$lower999 <- models$estimate - (3.29 * models$std.error)
models$higher999 <- models$estimate + (3.29 * models$std.error)

NL1A<- lm(value ~ treatfac + respondent_age + respondent_age2 + respondent_gender + respondent_degree +
            respondent_ideology + affective_muslims + affective_lgb + respondent_sexuality, 
          weights=weight, data = listNL_A)
NLtidyA <- tidy(NL1A)
NLtidyA$cntry <- "Netherlands"

DE1A<- lm(value ~ treatfac + respondent_age + respondent_age2 + respondent_gender + respondent_degree +
            respondent_ideology + affective_muslims + affective_lgb + respondent_sexuality, 
          weights=weight, data = listDE_A)
DEtidyA <- tidy(DE1A)
DEtidyA$cntry <- "Germany"

UK1A<- lm(value ~ treatfac + respondent_age + respondent_age2 + respondent_gender + respondent_degree +
            respondent_ideology + affective_muslims + affective_lgb + respondent_sexuality, data = listUK_A)
UKtidyA <- tidy(UK1A)
UKtidyA$cntry <- "UK"

US1A<- lm(value ~ treatfac + respondent_age + respondent_age2 + respondent_gender + respondent_degree +
            respondent_ideology + affective_muslims + affective_lgb + respondent_sexuality, data = listUS_A)
UStidyA <- tidy(US1A)
UStidyA$cntry <- "USA"

modelsA<- rbind(NLtidyA, DEtidyA, UKtidyA, UStidyA)
modelsA<-subset(modelsA, term %in% c("treatfac1")) 

modelsA$lower90 <- modelsA$estimate - (1.645 * modelsA$std.error)
modelsA$higher90 <- modelsA$estimate + (1.645 * modelsA$std.error)
modelsA$lower95 <- modelsA$estimate - (1.96 * modelsA$std.error)
modelsA$higher95 <- modelsA$estimate + (1.96 * modelsA$std.error)
modelsA$lower99 <- modelsA$estimate - (2.58 * modelsA$std.error)
modelsA$higher99 <- modelsA$estimate + (2.58 * modelsA$std.error)
modelsA$lower999 <- modelsA$estimate - (3.29 * modelsA$std.error)
modelsA$higher999 <- modelsA$estimate + (3.29 * modelsA$std.error)

NL1B<- lm(value ~ treatfac + respondent_age + respondent_age2 + respondent_gender + respondent_degree +
            respondent_ideology + affective_muslims + affective_lgb + respondent_sexuality, 
          weights=weight, data = listNL_B)
NLtidyB <- tidy(NL1B)
NLtidyB$cntry <- "Netherlands"

DE1B<- lm(value ~ treatfac + respondent_age + respondent_age2 + respondent_gender + respondent_degree +
            respondent_ideology + affective_muslims + affective_lgb + respondent_sexuality, 
          weights=weight, data = listDE_B)
DEtidyB <- tidy(DE1B)
DEtidyB$cntry <- "Germany"

UK1B<- lm(value ~ treatfac + respondent_age + respondent_age2 + respondent_gender + respondent_degree +
            respondent_ideology + affective_muslims + affective_lgb + respondent_sexuality, data = listUK_B)
UKtidyB <- tidy(UK1B)
UKtidyB$cntry <- "UK"

US1B<- lm(value ~ treatfac + respondent_age + respondent_age2 + respondent_gender + respondent_degree +
            respondent_ideology + affective_muslims + affective_lgb + respondent_sexuality, data = listUS_B)
UStidyB <- tidy(US1B)
UStidyB$cntry <- "USA"

modelsB<- rbind(NLtidyB, DEtidyB, UKtidyB, UStidyB)
modelsB<-subset(modelsB, term %in% c("treatfac1")) 

modelsB$lower90 <- modelsB$estimate - (1.645 * modelsB$std.error)
modelsB$higher90 <- modelsB$estimate + (1.645 * modelsB$std.error)
modelsB$lower95 <- modelsB$estimate - (1.96 * modelsB$std.error)
modelsB$higher95 <- modelsB$estimate + (1.96 * modelsB$std.error)
modelsB$lower99 <- modelsB$estimate - (2.58 * modelsB$std.error)
modelsB$higher99 <- modelsB$estimate + (2.58 * modelsB$std.error)
modelsB$lower999 <- modelsB$estimate - (3.29 * modelsB$std.error)
modelsB$higher999 <- modelsB$estimate + (3.29 * modelsB$std.error)



plot1<- ggplot(models, aes(y = reorder(cntry, -estimate), x=estimate, color=cntry)) + 
  geom_vline(xintercept = 0, linetype="longdash", colour="black")+
  scale_color_manual(values = colors, guide = "none")+
  geom_segment(aes(x=lower90, xend=higher90, y=cntry, yend=cntry), size=3.2)+ 
  geom_segment(aes(x=lower95, xend=higher95, y=cntry, yend=cntry), size=2.5, alpha=.8)+ 
  geom_segment(aes(x=lower99, xend=higher99, y=cntry, yend=cntry), size=2, alpha=.7)+
  geom_segment(aes(x=lower999, xend=higher999, y=cntry, yend=cntry), size=1.5, alpha=.6)+
  geom_point(fill="white", color="grey67", size=2.5,  pch=21)+
  xlim(-.1,1)+
  geom_label(aes(label = cntry), nudge_y=.2)+
  geom_label(aes(label=sprintf("ATE = %.2f", round(estimate, digits = 2))), nudge_y = -.15, size=2)+
  theme_bw()+
  labs(x="",
       title="Double List")+
  theme(legend.position = "none",
        axis.title.y = element_blank(),
        plot.title=element_text(color="black", face="bold", hjust=.5),
        axis.text.x=element_text(color="black"),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank())


plot2<- ggplot(modelsA, aes(y = reorder(cntry, -estimate), x=estimate, color=cntry)) + 
  geom_vline(xintercept = 0, linetype="longdash", colour="black")+
  scale_color_manual(values = colors, guide = "none")+
  geom_segment(aes(x=lower90, xend=higher90, y=cntry, yend=cntry), size=3.2)+ 
  geom_segment(aes(x=lower95, xend=higher95, y=cntry, yend=cntry), size=2.5, alpha=.8)+ 
  geom_segment(aes(x=lower99, xend=higher99, y=cntry, yend=cntry), size=2, alpha=.7)+
  geom_segment(aes(x=lower999, xend=higher999, y=cntry, yend=cntry), size=1.5, alpha=.6)+
  geom_point(fill="white", color="grey67", size=2.5,  pch=21)+
  xlim(-.1,1)+
  geom_label(aes(label = cntry), nudge_y=.2)+
  geom_label(aes(label=sprintf("ATE = %.2f", round(estimate, digits = 2))), nudge_y = -.15, size=2)+
  theme_bw()+
  labs(x="Estimated treatment effect\n(covariate adjusted)",
       title="List A")+
  theme(legend.position = "none",
        axis.title.y = element_blank(),
        plot.title=element_text(color="black", face="bold", hjust=.5),
        axis.text.x=element_text(color="black"),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank())


plot3<- ggplot(modelsB, aes(y = reorder(cntry, -estimate), x=estimate, color=cntry)) + 
  geom_vline(xintercept = 0, linetype="longdash", colour="black")+
  scale_color_manual(values = colors, guide = "none")+
  geom_segment(aes(x=lower90, xend=higher90, y=cntry, yend=cntry), size=3.2)+ 
  geom_segment(aes(x=lower95, xend=higher95, y=cntry, yend=cntry), size=2.5, alpha=.8)+ 
  geom_segment(aes(x=lower99, xend=higher99, y=cntry, yend=cntry), size=2, alpha=.7)+
  geom_segment(aes(x=lower999, xend=higher999, y=cntry, yend=cntry), size=1.5, alpha=.6)+
  geom_point(fill="white", color="grey67", size=2.5,  pch=21)+
  xlim(-.1,1)+
  geom_label(aes(label = cntry), nudge_y=.2)+
  geom_label(aes(label=sprintf("ATE = %.2f", round(estimate, digits = 2))), nudge_y = -.15, size=2)+
  theme_bw()+
  labs(x="",
       title="List B")+
  theme(legend.position = "none",
        axis.title.y = element_blank(),
        plot.title=element_text(color="black", face="bold", hjust=.5),
        axis.text.x=element_text(color="black"),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank())

plot1+plot2+plot3+
  plot_annotation(title="Modelling the prelevance of sensitive item in (double) list experiment",
                  caption= "Confidence intervals at 90%, 95%, 99%, and 99.9%. Dual-list confidence intervals clustered by inidividual respondent.") &
  theme(plot.title=element_text(color="black", face="bold"))
ggsave("Figure1.png", 
       path = here("figures"),
       dpi=600)


###FIGURE 2###

list_dataNL<- list_dataNL  %>% 
  mutate(respondent_sexuality=as.factor(respondent_sexuality))
list_dataDE<- list_dataDE  %>% 
  mutate(respondent_sexuality=as.factor(respondent_sexuality))
list_dataUK<- list_dataUK  %>% 
  mutate(respondent_sexuality=as.factor(respondent_sexuality))
list_dataUS<- list_dataUS  %>% 
  mutate(respondent_sexuality=as.factor(respondent_sexuality))

NL1X<- lm(value ~ treatfac*respondent_sexuality + respondent_age + respondent_age2 + respondent_gender + respondent_degree +
            respondent_ideology + affective_muslims + affective_lgb, 
          weights=weight, data = list_dataNL)
NLtidyX <- tidy(NL1X)
NLtidyX$cntry <- "Netherlands"
marg_NL <-
  NL1X %>%
  margins(at = list(respondent_sexuality = c("0", "1"))) %>%
  summary %>%
  as.data.frame() %>%
  filter(factor == "treatfac1")
marg_NL$cntry <- "NL"


DE1X<- lm(value ~ treatfac*respondent_sexuality + respondent_age + respondent_age2 + respondent_gender + respondent_degree +
            respondent_ideology + affective_muslims + affective_lgb, 
          weights=weight, data = list_dataDE)
DEtidyX <- tidy(DE1X)
DEtidyX$cntry <- "Germany"

marg_DE <-
  DE1X %>%
  margins(at = list(respondent_sexuality = c("0", "1"))) %>%
  summary %>%
  as.data.frame() %>%
  filter(factor == "treatfac1")
marg_DE$cntry <- "DE"


UK1X<- lm(value ~ treatfac*respondent_sexuality + respondent_age + respondent_age2 + respondent_gender + respondent_degree +
            respondent_ideology + affective_muslims + affective_lgb, 
          data = list_dataUK)
UKtidyX <- tidy(UK1X)
UKtidyX$cntry <- "UK"

marg_UK <-
  UK1X %>%
  margins(at = list(respondent_sexuality = c("0", "1"))) %>%
  summary %>%
  as.data.frame() %>%
  filter(factor == "treatfac1")
marg_UK$cntry <- "UK"

US1X<- lm(value ~ treatfac*respondent_sexuality + respondent_age + respondent_age2 + respondent_gender + respondent_degree +
            respondent_ideology + affective_muslims + affective_lgb, 
          data = list_dataUS)
UStidyX <- tidy(US1X)
UStidyX$cntry <- "USA"

marg_US <-
  US1X %>%
  margins(at = list(respondent_sexuality = c("0", "1"))) %>%
  summary %>%
  as.data.frame() %>%
  filter(factor == "treatfac1")
marg_US$cntry <- "USA"


CATE<- rbind(marg_NL, marg_DE, marg_UK, marg_US)
CATE_hetero<-subset(CATE, respondent_sexuality %in% c("0")) 
CATE_LGBT<-subset(CATE, respondent_sexuality %in% c("1")) 

CATE_hetero$lower90 <- CATE_hetero$AME - (1.645 * CATE_hetero$SE)
CATE_hetero$higher90 <- CATE_hetero$AME + (1.645 * CATE_hetero$SE)
CATE_hetero$lower95 <- CATE_hetero$AME - (1.96 * CATE_hetero$SE)
CATE_hetero$higher95 <- CATE_hetero$AME + (1.96 * CATE_hetero$SE)
CATE_hetero$lower99 <- CATE_hetero$AME - (2.58 * CATE_hetero$SE)
CATE_hetero$higher99 <- CATE_hetero$AME + (2.58 * CATE_hetero$SE)
CATE_hetero$lower999 <- CATE_hetero$AME - (3.29 * CATE_hetero$SE)
CATE_hetero$higher999 <- CATE_hetero$AME + (3.29 * CATE_hetero$SE)


CATE_LGBT$lower90 <- CATE_LGBT$AME - (1.645 * CATE_LGBT$SE)
CATE_LGBT$higher90 <- CATE_LGBT$AME + (1.645 * CATE_LGBT$SE)
CATE_LGBT$lower95 <- CATE_LGBT$AME - (1.96 * CATE_LGBT$SE)
CATE_LGBT$higher95 <- CATE_LGBT$AME + (1.96 * CATE_LGBT$SE)
CATE_LGBT$lower99 <- CATE_LGBT$AME - (2.58 * CATE_LGBT$SE)
CATE_LGBT$higher99 <- CATE_LGBT$AME + (2.58 * CATE_LGBT$SE)
CATE_LGBT$lower999 <- CATE_LGBT$AME - (3.29 * CATE_LGBT$SE)
CATE_LGBT$higher999 <- CATE_LGBT$AME + (3.29 * CATE_LGBT$SE)


CATE<- rbind(CATE_hetero, CATE_LGBT)
CATE<- CATE  %>% 
  mutate(respondent_sexuality=as.factor(respondent_sexuality))
CATE$respondent_sexuality<- factor(CATE$respondent_sexuality,
                                   levels = c(0, 1),
                                   labels = c("Cis-heterosexual respondents", "LGBTQ+ respondents"))


panel1<- ggplot(CATE, aes(y = reorder(cntry, -AME), x=AME, color=cntry)) + 
  geom_vline(xintercept = 0, linetype="longdash", colour="black")+
  scale_color_manual(values = colors, guide = "none")+
  geom_segment(aes(x=lower90, xend=higher90, y=cntry, yend=cntry), size=3.2)+ 
  geom_segment(aes(x=lower95, xend=higher95, y=cntry, yend=cntry), size=2.5, alpha=.8)+ 
  geom_segment(aes(x=lower99, xend=higher99, y=cntry, yend=cntry), size=2, alpha=.7)+
  geom_segment(aes(x=lower999, xend=higher999, y=cntry, yend=cntry), size=1.5, alpha=.6)+
  geom_point(fill="white", color="grey67", size=2.5,  pch=21)+
  geom_label(aes(label = cntry), nudge_y=.2)+
  geom_label(aes(label=sprintf("CATE = %.2f", round(AME, digits = 2))), nudge_y = -.15, size=2)+
  theme_bw() +
  facet_wrap(~respondent_sexuality)+
  labs(x="Conditional average treatment effect (CATE)",
       title="(No) Treatment Heteogeneity Between Cis-Hetosexual & LGBTQ+ Respondents",
       caption= "Confidence intervals at 90%, 95%, 99%, and 99.9%")+
  theme(legend.position = "none",
        axis.title.y = element_blank(),
        plot.title=element_text(color="black", face="bold"),
        axis.text.x=element_text(color="black"),
        axis.title.x=element_text(color="black", face="bold"),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        strip.background = element_blank(), 
        strip.text = element_text(face = "bold", size=10))

panel1

ggsave("Figure2.png", 
       path = here("figures"),
       dpi=600)



###FIGURE 3####

list_dataNL$cntry <- "Netherlands"
list_dataDE$cntry <- "Germany"
list_dataUK$cntry <- "UK"
list_dataUS$cntry <- "US"

list_dataNL<- list_dataNL  %>% 
  mutate(respondent_sexualityN=as.numeric(respondent_sexuality))
list_dataDE<- list_dataDE  %>% 
  mutate(respondent_sexualityN=as.numeric(respondent_sexuality))
list_dataUK<- list_dataUK  %>% 
  mutate(respondent_sexualityN=as.numeric(respondent_sexuality))
list_dataUS<- list_dataUS  %>% 
  mutate(respondent_sexualityN=as.numeric(respondent_sexuality))

list_df <- list(list_dataNL, list_dataDE, list_dataUK, list_dataUS)
fulldf <- bind_rows(list_df)




covs <- c("respondent_age", "respondent_gender", "respondent_degree", "respondent_ideology", "affective_muslims", 
          "affective_lgb", "respondent_sexualityN")

index <- with(fulldf, complete.cases(value, treated))
X <- fulldf[index, c(covs, 'cntry')]

X$study1 <- as.numeric(X$cntry == "Germany")
X$study2 <- as.numeric(X$cntry == "Netherlands")
X$study3 <- as.numeric(X$cntry == "UK")
X$study4 <- as.numeric(X$cntry == "US")
X <- subset(X, select = -cntry)

X <- as.matrix(X)
Y_Obs <- fulldf$value[index]
W <- fulldf$treated[index]
Z <- fulldf$treated[index]

set.seed(123)
tau_forest <- instrumental_forest(Y = Y_Obs, X = X, W = W, Z = Z, 
                                  honesty = TRUE, num.trees = 4000, seed = 123)

tau_hat_forest <- predict(tau_forest, estimate.variance = TRUE)

sigma_hat <- sqrt(tau_hat_forest$variance.estimates)

grf_df <- 
  cbind(data.frame(
    tau_hat = tau_hat_forest$predictions,
    ci_upper = tau_hat_forest$predictions + 1.96 * sigma_hat,
    ci_lower = tau_hat_forest$predictions - 1.96 * sigma_hat),
    X
  )


grf_df$order <- with(grf_df, rank(tau_hat, ties.method = "first"))
grf_df$study <- NA
grf_df$study[grf_df$study1 == 1] <- "Germany"
grf_df$study[grf_df$study2 == 1] <- "Netherlands"
grf_df$study[grf_df$study3 == 1] <- "UK"
grf_df$study[grf_df$study4 == 1] <- "US"

plot_df <-
  grf_df %>%
  group_by(study) %>%
  mutate(order = rank(tau_hat, ties.method = "first")) %>%
  ungroup()

grf_plot <- 
  grf_df %>% 
  ggplot(., aes(x = tau_hat, y = order, color = study)) +
  facet_wrap(~study)+
  geom_vline(xintercept = 0, linetype="longdash", colour="black")+
  geom_vline(xintercept = .5, linetype="longdash", colour="black")+
  scale_color_manual(values = colors, guide = "none")+
  geom_errorbarh(aes(xmin = ci_lower, xmax = ci_upper),
                 alpha = 0.15, height = 0, size = 0.5) + 
  geom_point(size = 1, pch = 20) +
  labs(title = "",
       x = "Causal forest estimated treatment effects", 
       y = "Individual trees in causal forect",
       caption= "Confidence intervals at 95%")+
  theme_bw()+
  theme(strip.background = element_blank(),
        strip.text = element_text(color="black", size=10, face="bold", hjust=.5),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank())

grf_plot

ggsave("Figure3.png",
       path = here("figures"),
       height=18, width=24, unit="cm", dpi=800)
